#include <stdio.h>

#include <cuda_runtime.h>

#include "hello.cuh"

__global__ void hello(){
    int i = blockDim.x * blockIdx.x + threadIdx.x;
//    int i=0;
    printf("hello %d\n",i);
}

int test_cuda(void){
    hello<<<1, 3>>>();
    cudaDeviceSynchronize();

    printf("Done \n");

    return 0;
}
